November 6, 2024
Two Sample t-test
data: vep16_turnout by south
t = 2.3273, df = 48, p-value = 0.02421
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
0.5865435 8.0340480
sample estimates:
mean in group 0 mean in group 1
63.03529 58.72500
Call:
lm(formula = vep16_turnout ~ south, data = states)
Residuals:
Min 1Q Median 3Q Max
-20.0353 -4.3103 0.5699 4.6147 11.7647
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 63.035 1.048 60.167 <2e-16 ***
south -4.310 1.852 -2.327 0.0242 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.109 on 48 degrees of freedom
Multiple R-squared: 0.1014, Adjusted R-squared: 0.08268
F-statistic: 5.417 on 1 and 48 DF, p-value: 0.02421
# Multiple regression
summary(lm(vep16_turnout ~ south + ba_or_more_2015 + hispanicpct_2016, data=states),
digits=4)
Call:
lm(formula = vep16_turnout ~ south + ba_or_more_2015 + hispanicpct_2016,
data = states)
Residuals:
Min 1Q Median 3Q Max
-20.6360 -2.1145 -0.0495 2.3282 8.9824
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 44.65209 4.89562 9.121 6.98e-12 ***
south -2.15840 1.66087 -1.300 0.20023
ba_or_more_2015 0.66923 0.15497 4.319 8.29e-05 ***
hispanicpct_2016 -0.19519 0.07114 -2.744 0.00863 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.053 on 46 degrees of freedom
Multiple R-squared: 0.4109, Adjusted R-squared: 0.3725
F-statistic: 10.7 on 3 and 46 DF, p-value: 1.879e-05
# A tibble: 3 × 3
Race3 n pct
<dbl+lbl> <int> <dbl>
1 1 [White] 3038 78.2
2 2 [Black] 398 10.2
3 3 [Hispanic] 450 11.6
# A tibble: 2 × 3
white n pct
<dbl> <int> <dbl>
1 0 848 21.8
2 1 3038 78.2
# A tibble: 3 × 3
Race3 n pct
<dbl+lbl> <int> <dbl>
1 1 [White] 3038 78.2
2 2 [Black] 398 10.2
3 3 [Hispanic] 450 11.6
# A tibble: 2 × 3
black n pct
<dbl> <int> <dbl>
1 0 3488 89.8
2 1 398 10.2
# A tibble: 3 × 3
Race3 n pct
<dbl+lbl> <int> <dbl>
1 1 [White] 3038 78.2
2 2 [Black] 398 10.2
3 3 [Hispanic] 450 11.6
# A tibble: 2 × 3
hisp n pct
<dbl> <int> <dbl>
1 0 3436 88.4
2 1 450 11.6
Call:
lm(formula = ft_Dem ~ black + hisp, data = nes)
Residuals:
Min 1Q Median 3Q Max
-73.919 -26.740 0.231 23.156 57.260
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.7405 0.5177 82.55 <2e-16 ***
black 31.1787 1.5148 20.58 <2e-16 ***
hisp 17.0284 1.4507 11.74 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 28.33 on 3824 degrees of freedom
(444 observations deleted due to missingness)
Multiple R-squared: 0.1177, Adjusted R-squared: 0.1172
F-statistic: 255 on 2 and 3824 DF, p-value: < 2.2e-16
Call:
lm(formula = ft_Dem ~ black + white, data = nes)
Residuals:
Min 1Q Median 3Q Max
-73.919 -26.740 0.231 23.156 57.260
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 59.769 1.355 44.104 < 2e-16 ***
black 14.150 1.965 7.199 7.24e-13 ***
white -17.028 1.451 -11.738 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 28.33 on 3824 degrees of freedom
(444 observations deleted due to missingness)
Multiple R-squared: 0.1177, Adjusted R-squared: 0.1172
F-statistic: 255 on 2 and 3824 DF, p-value: < 2.2e-16
Call:
lm(formula = ft_Dem ~ hisp + white, data = nes)
Residuals:
Min 1Q Median 3Q Max
-73.919 -26.740 0.231 23.156 57.260
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 73.919 1.424 51.924 < 2e-16 ***
hisp -14.150 1.965 -7.199 7.24e-13 ***
white -31.179 1.515 -20.582 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 28.33 on 3824 degrees of freedom
(444 observations deleted due to missingness)
Multiple R-squared: 0.1177, Adjusted R-squared: 0.1172
F-statistic: 255 on 2 and 3824 DF, p-value: < 2.2e-16
#Use built-in "as.factor" to dummy out race in the lm commmand
summary(lm(ft_Dem ~ as.factor(Race3), data=nes))
Call:
lm(formula = ft_Dem ~ as.factor(Race3), data = nes)
Residuals:
Min 1Q Median 3Q Max
-73.919 -26.740 0.231 23.156 57.260
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.7405 0.5177 82.55 <2e-16 ***
as.factor(Race3)2 31.1787 1.5148 20.58 <2e-16 ***
as.factor(Race3)3 17.0284 1.4507 11.74 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 28.33 on 3824 degrees of freedom
(444 observations deleted due to missingness)
Multiple R-squared: 0.1177, Adjusted R-squared: 0.1172
F-statistic: 255 on 2 and 3824 DF, p-value: < 2.2e-16
#Use built-in "as.factor" to dummy out race in the lm commmand
summary(lm(ft_Dem ~ relevel(as.factor(Race3), ref="3"), data=nes))
Call:
lm(formula = ft_Dem ~ relevel(as.factor(Race3), ref = "3"), data = nes)
Residuals:
Min 1Q Median 3Q Max
-73.919 -26.740 0.231 23.156 57.260
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 59.769 1.355 44.104 < 2e-16 ***
relevel(as.factor(Race3), ref = "3")1 -17.028 1.451 -11.738 < 2e-16 ***
relevel(as.factor(Race3), ref = "3")2 14.150 1.965 7.199 7.24e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 28.33 on 3824 degrees of freedom
(444 observations deleted due to missingness)
Multiple R-squared: 0.1177, Adjusted R-squared: 0.1172
F-statistic: 255 on 2 and 3824 DF, p-value: < 2.2e-16
#Use built-in "as.factor" to dummy out race in the lm commmand
summary(lm(ft_Dem ~ relevel(as.factor(Race3), ref="2"), data=nes))
Call:
lm(formula = ft_Dem ~ relevel(as.factor(Race3), ref = "2"), data = nes)
Residuals:
Min 1Q Median 3Q Max
-73.919 -26.740 0.231 23.156 57.260
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 73.919 1.424 51.924 < 2e-16 ***
relevel(as.factor(Race3), ref = "2")1 -31.179 1.515 -20.582 < 2e-16 ***
relevel(as.factor(Race3), ref = "2")3 -14.150 1.965 -7.199 7.24e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 28.33 on 3824 degrees of freedom
(444 observations deleted due to missingness)
Multiple R-squared: 0.1177, Adjusted R-squared: 0.1172
F-statistic: 255 on 2 and 3824 DF, p-value: < 2.2e-16
# Specify Race3 using factor; need this for post-estimation
a <- lm(ft_Dem ~ as_factor(Race3) + libcon7 +
Female + educ4, data=nes)
summary(a, digits=3)
Call:
lm(formula = ft_Dem ~ as_factor(Race3) + libcon7 + Female + educ4,
data = nes)
Residuals:
Min 1Q Median 3Q Max
-90.976 -15.510 1.842 15.775 77.511
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 88.6869 1.7027 52.087 < 2e-16 ***
as_factor(Race3)Black 24.5347 1.2952 18.943 < 2e-16 ***
as_factor(Race3)Hispanic 14.0080 1.2418 11.281 < 2e-16 ***
libcon7 -10.8536 0.2645 -41.038 < 2e-16 ***
Female 3.3306 0.7729 4.309 1.68e-05 ***
educ4 -0.5379 0.3891 -1.383 0.167
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 23.36 on 3698 degrees of freedom
(567 observations deleted due to missingness)
Multiple R-squared: 0.4026, Adjusted R-squared: 0.4018
F-statistic: 498.5 on 5 and 3698 DF, p-value: < 2.2e-16
# Bar graph with labels
race |>
ggplot(aes(y=estimate, x=as.factor(Race3), ymin = conf.low, ymax = conf.high,
label = round(estimate, digits=2))) +
geom_bar(stat = "identity", alpha=.8, width = .5) +
geom_errorbar(width=.08, size=.2) +
labs(x=NULL, y="Predicted Democratic Thermometer") +
scale_x_discrete(breaks=c("1","2","3"),
labels=c("White", "Black", "Hispanic")) +
geom_text(vjust = 6, color="white", size=3.5)